""" function about the page
"""
import re
import sys
import urlparse

try:
    from BeautifulSoup import BeautifulSoup as BS
except:
    from bs4 import BeautifulSoup as BS


#def openUrl(opener, url, proxy, agent):
#    """ open url
#    
#    @params:opener: different opener
#    """
#    return opener.open()
# Links pattern
RE_PATTERN_URL = [
    (r'href\s*=\s*(\'|\")(.+?)(\1)', 1), # represent the 2nd brackets,
]


def getLinks(base_url, content):
    """ get all links of content
    """
    links = set()
    for pattern in RE_PATTERN_URL:
        p, idx = pattern
        result = re.findall(p, content)
        for item in result:
            item = item[idx]
            if item.startswith("#"): continue
            if item.startswith("/"):
                item = urlparse.urljoin(base_url, item)
            if not item.startswith("http"):
                continue

            links.add(item)
    return links
